Scripts to run CAVIAR, FINEMAP (version 1.1) and DAP-G

knitr::opts_chunk$set(error = TRUE) # Knit even with errors

# devtools::install_local("geneRefineR/", force = T)
# library("echolocatoR")
source("echolocatoR/R/functions.R") 

library(readxl)
library(DT)
library(data.table)
library(dplyr)
library(ggplot2)
library(plotly)
library(cowplot)
library(ggrepel)
library(curl)
library(biomaRt) 
# Ensembl LD API
# library(httr)
# library(jsonlite)
# library(xml2)
# library(gaston)
# library(RCurl) 
# *** susieR ****
# library(knitrBootstrap) #install_github('jimhester/knitrBootstrap')
library(susieR) # devtools::install_github("stephenslab/susieR")
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
## 
## Matrix products: default
## BLAS/LAPACK: /hpc/packages/minerva-common/intel/parallel_studio_xe_2018/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] susieR_0.6.2.0411  biomaRt_2.38.0     tidyr_0.8.1       
##  [4] gaston_1.5.4       RcppParallel_4.4.2 Rcpp_1.0.1        
##  [7] curl_3.2           ggrepel_0.8.0      cowplot_0.9.3     
## [10] plotly_4.7.1       ggplot2_3.1.0      dplyr_0.7.6       
## [13] data.table_1.12.2  DT_0.4             readxl_1.1.0      
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-35      prettyunits_1.0.2    assertthat_0.2.0    
##  [4] rprojroot_1.3-2      digest_0.6.18        R6_2.4.0            
##  [7] cellranger_1.1.0     plyr_1.8.4           backports_1.1.2     
## [10] stats4_3.5.1         RSQLite_2.1.1        evaluate_0.11       
## [13] httr_1.3.1           pillar_1.3.1         rlang_0.3.1         
## [16] progress_1.2.0       lazyeval_0.2.1       blob_1.1.1          
## [19] S4Vectors_0.20.1     Matrix_1.2-14        rmarkdown_1.10      
## [22] stringr_1.4.0        htmlwidgets_1.2      RCurl_1.95-4.11     
## [25] bit_1.1-14           munsell_0.5.0        compiler_3.5.1      
## [28] pkgconfig_2.0.2      BiocGenerics_0.28.0  htmltools_0.3.6     
## [31] tidyselect_0.2.4     expm_0.999-2         tibble_2.0.1        
## [34] IRanges_2.16.0       XML_3.98-1.12        viridisLite_0.3.0   
## [37] crayon_1.3.4         withr_2.1.2          bitops_1.0-6        
## [40] grid_3.5.1           jsonlite_1.5         gtable_0.2.0        
## [43] DBI_1.0.0            magrittr_1.5         scales_1.0.0        
## [46] stringi_1.3.1        bindrcpp_0.2.2       tools_3.5.1         
## [49] bit64_0.9-7          Biobase_2.42.0       glue_1.3.0          
## [52] purrr_0.2.5          hms_0.4.2            parallel_3.5.1      
## [55] yaml_2.1.19          AnnotationDbi_1.44.0 colorspace_1.3-2    
## [58] memoise_1.1.0        knitr_1.20           bindr_0.1.1
print(paste("susieR ", packageVersion("susieR")))  
## [1] "susieR  0.6.2.411"
# MINERVA PATHS TO SUMMARY STATISTICS DATASETS: 
Data_dirs <- read.csv("./Results/directories_table.csv")

# Direct to local data files if not working on Minerva
if(getwd()=="/Users/schilder/Desktop/Fine_Mapping"){
  vcf_folder = F # Use internet if I'm working from my laptop
} else{
  vcf_folder = F#"../1000_Genomes_VCFs/Phase1" # Use previously downloaded files if working on Minerva node
}  

force_new_subset = F
allResults <- list()

CAVIARBF

# system("wget https://bitbucket.org/Wenan/caviarbf/get/7e428645be5e.zip") 
# system("cd CAVIARBF")
# system("make")
# install.packages("echolocatoR/tools/CAVIARBF/caviarbf-r-package/caviarbf_0.2.1.tar.gz", repos = NULL, type = "source") 
library(caviarbf)
# caviarbfFineMapping( )
kunkle <-fread("Data/Alzheimers/Kunkle_2019/PTK2B_Kunkle_2019_subset.txt")
kunkle = subset(kunkle, select = c("MarkerName","Beta","SE"))
system("./caviarbf -z ./example/myfile.Z -r ./example/myfile.LD -t 0 -a 0.1281429 -n 2000 -c 5 -o ./example/myfile.sigma0.1281429.bf")

Parkinson’s Disease

Nalls et. al. (2019) w/ 23andMe

Rare coding variant burden analysis:
We performed kernel-based burden tests on the 113 genes in our PD GWAS loci that contained two or more rare coding variants (MAF< 5% or MAF < 1%). After Bonferroni correction for 113 genes, we identified 7 significant putatively causal genes: - LRRK2, GBA, CATSPER3 (rs11950533/C5orf24 locus) - LAMB2 (rs12497850/IP6K2 locus) - LOC442028 (rs2042477/KCNIP3 locus) - NFKB2 (rs10748818/GBF1 locus), and SCARB2 (rs6825004 locus).”

– from Nalls et al. (2019)

dataset_name <- "Nalls_23andMe"

top_SNPs <- Nalls_top_SNPs <- import_topSNPs(
  file_path = Directory_info(dataset_name, "topSumStats"),
  chrom_col = "CHR", position_col = "BP", snp_col="SNP",
  pval_col="P, all studies", effect_col="Beta, all studies", gene_col="Nearest Gene",
  caption= "Nalls et al. (2018) w/ 23andMe PD GWAS Summary Stats")

gene_list <- c("LRRK2")# "LRRK2_alt","SNCA","VPS13C","C5orf24","IP6K2","KCNIP3","GBF1","SCARB2")#, top_SNPs$Gene) %>% unique()
allResults[[dataset_name]] <- finemap_geneList(top_SNPs, geneList=gene_list, query_by="coordinates", subset_path = "auto",
                 fullSS_path = Directory_info(dataset_name, "fullSumStats"),
                 # fullSS_path = "Data/Parkinsons/Nalls_23andMe/nallsEtAl2019_allSamples_allVariants.mod.txt", 
                 chrom_col = "CHR", position_col = "POS", snp_col = "RSID",
                 pval_col = "p", effect_col = "beta", stderr_col = "se", 
                 vcf_folder = vcf_folder, superpopulation = "EUR", force_new_subset = force_new_subset,
                 LD_block = T, block_size = .7)

LRRK2

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/LRRK2_EUR_combined_meta_subset.txt …

Subset file looks good! :) + Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/LRRK2_subset.vcf

—— Importing VCF as bed file… ——

++++++++++ Reading in BIM file… ++++++++++

++++++++++ Calculating LD ++++++++++

++++++++++ LD matrix calculated in 2.05 seconds. ++++++++++

Returning lead SNP’s block…

++++++++++ Calculating LD blocks… ++++++++++

Number of SNPs in LD block = 147 + Fine mapping with SusieR… [1] “objective:-182.231837554327” [1] “objective:-171.131320050272” [1] “objective:-170.966063306093” [1] “objective:-170.963980624951” [1] “objective:-170.963966276454” [1] “objective:-170.963966177622”

Credible Set:

****** 1 SNPs included in Credible Set ******

LRRK2 fine-mapped in 25.11 seconds

manhattan_plot(
               subset_path= get_subset_path(fullSS_path=Directory_info(dataset_name, "fullSumStats"), gene="LRRK2" ),
               # subset_path="./Data/Parkinsons/Nalls_23andMe/LRRK2_EUR_Nalls_23andMe_subset.txt", 
               gene="LRRK2", SNP_list = c("rs76904798","rs34637584","rs117073808"), alt_color_SNPs=c("rs34637584"))

Alzheimer’s Disease

Posthuma (2018)

dataset_name <- "Posthuma_2018"

top_SNPs <- import_topSNPs(
  file_path = Directory_info(dataset_name, "topSumStats"),
  sheet = "Table S8",
  chrom_col = "Chr", position_col = "bp", snp_col="SNP",
  pval_col="P", effect_col="Zscore", gene_col="nearestGene",
  caption= "Posthuma et al. (2018) AD GWAS Summary Stats")

allResults[[dataset_name]] <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),query_by="coordinates", subset_path = "auto",
                 fullSS_path = Directory_info(dataset_name, "fullSumStats"),
                 #"Data/Alzheimers/Posthuma_2018/phase3.beta.se.hrc.txt",#
                 effect_col = "BETA", stderr_col = "SE", position_col = "BP",
                 snp_col = "SNP", pval_col = "P",
                 vcf_folder = vcf_folder, superpopulation = "EUR", force_new_subset = force_new_subset,
                 LD_block = T)

PTK2B

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /sc/orga/projects/ad-omics/data/AD_GWAS/Posthuma_2018/PTK2B_EUR_Posthuma_2018_subset.txt …

Subset file looks good! :) + Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf

—— Importing VCF as bed file… ——

++++++++++ Reading in BIM file… ++++++++++

++++++++++ Calculating LD ++++++++++

++++++++++ LD matrix calculated in 1.41 seconds. ++++++++++

Returning lead SNP’s block…

++++++++++ Calculating LD blocks… ++++++++++

Number of SNPs in LD block = 32 + Fine mapping with SusieR… [1] “objective:-39.6297333947306” [1] “objective:-35.1183629950145” [1] “objective:-34.6205003513782” [1] “objective:-34.5753217845995” [1] “objective:-34.5737853142779” [1] “objective:-34.5737329719813” [1] “objective:-34.5737312795636” [1] “objective:-34.5737312248382”

Credible Set:

****** 1 SNPs included in Credible Set ******

PTK2B fine-mapped in 33.9 seconds

Marioni (2018)

dataset_name <- "Marioni_2018"

top_SNPs <- import_topSNPs(
  file_path = Directory_info(dataset_name, "topSumStats"),
  sheet = "Table S9",
  chrom_col = "topSNP_chr", position_col = "topSNP_bp", snp_col="topSNP",
  pval_col="p_GWAS", effect_col="b_GWAS", gene_col="Gene",
  caption= "Marioni et al. (2018) AD GWAS Summary Stats")

allResults[[dataset_name]] <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),query_by="coordinates", subset_path = "auto",
                 fullSS_path = Directory_info(dataset_name, "fullSumStats"),
                 effect_col = "BETA", stderr_col = "SE", position_col = "POS", 
                 snp_col = "SNP",chrom_col = "CHROM", pval_col = "PVAL",
                 vcf_folder = vcf_folder, superpopulation = "EUR",force_new_subset = force_new_subset,
                 LD_block = T)

PTK2B

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /sc/orga/projects/ad-omics/data/AD_GWAS/Marioni_2018/PTK2B_EUR_Marioni_2018_subset.txt …

Subset file looks good! :) + Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf

—— Importing VCF as bed file… ——

++++++++++ Reading in BIM file… ++++++++++

++++++++++ Calculating LD ++++++++++

++++++++++ LD matrix calculated in 0.41 seconds. ++++++++++

Returning lead SNP’s block…

++++++++++ Calculating LD blocks… ++++++++++

Number of SNPs in LD block = 116

  • Fine mapping with SusieR… [1] “objective:-157.507867734841” [1] “objective:-155.863123093353” [1] “objective:-155.842147145117” [1] “objective:-155.841896836129” [1] “objective:-155.841894478557” [1] “objective:-155.841894456342”

Credible Set:

****** 2 SNPs included in Credible Set ******

PTK2B fine-mapped in 10.74 seconds

Lambert (2013) (IGAP)

Filtering out the CLU portion of the locus prevents susieR from identifying a credible set.

dataset_name <- "Lambert_2013"

top_SNPs <- import_topSNPs(
  file_path = Directory_info(dataset_name, "topSumStats"),
  sheet = 1,
  chrom_col = "CHR", position_col = "Position", snp_col="SNP",
  pval_col="Overall_P", effect_col="Overall_OR", gene_col="Closest gene",
  caption= "Lambert (2013) (IGAP): AD GWAS Summary Stats")
# genes <- snps_to_genes(snp_list=top_SNPs$SNP )

allResults[[dataset_name]] <- finemap_geneList(top_SNPs, geneList=c("PTK2B"), query_by="coordinates", subset_path = "auto",
                   fullSS_path = Directory_info(dataset_name, "fullSumStats"),
                   chrom_col = "CHROM",  pval_col = "PVAL", snp_col = "SNP",
                   effect_col = "BETA", stderr_col = "SE", position_col = "POS", 
                   vcf_folder = vcf_folder, num_causal = 1, superpopulation = "EUR", 
                   force_new_subset = force_new_subset,
                   LD_block = T)  

PTK2B

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /sc/orga/projects/ad-omics/data/AD_GWAS/Lambert_2013/PTK2B_EUR_Lambert_2013_subset.txt …

Subset file looks good! :) + Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf

—— Importing VCF as bed file… ——

++++++++++ Reading in BIM file… ++++++++++

++++++++++ Calculating LD ++++++++++

++++++++++ LD matrix calculated in 0.33 seconds. ++++++++++

Returning lead SNP’s block…

++++++++++ Calculating LD blocks… ++++++++++

Number of SNPs in LD block = 40 + Fine mapping with SusieR… [1] “objective:-51.8834738945391” [1] “objective:-48.8484412897536” [1] “objective:-48.6075367226916” [1] “objective:-48.5920325669939” [1] “objective:-48.5915756859886” [1] “objective:-48.5915622933929”

Credible Set:

****** 2 SNPs included in Credible Set ******

PTK2B fine-mapped in 9.7 seconds

Kunkle (2019) (IGAP)

dataset_name <- "Kunkle_2019"

top_SNPs <- import_topSNPs(
  file_path = Directory_info(dataset_name, "topSumStats"),
  sheet = "Supplementary Table 6",
  chrom_col = "Chr", position_col = "Basepair", snp_col="SNP",
  pval_col="P", effect_col="Beta", gene_col="LOCUS",
  caption= "Kunkle (2019) (IGAP): AD GWAS Summary Stats")
# genes <- snps_to_genes(snp_list=top_SNPs$SNP )

allResults[[dataset_name]] <- finemap_geneList(top_SNPs, geneList=c("PTK2B"), query_by="coordinates", subset_path = "auto",
                   fullSS_path = Directory_info(dataset_name, "fullSumStats"),
                   # fullSS_path="Data/Alzheimers/Kunkle_2019/Kunkle_etal_Stage1_results.txt",
                   chrom_col = "Chromosome",  position_col = "Position", pval_col = "Pvalue",
                   snp_col = "MarkerName", effect_col = "Beta", stderr_col = "SE",
                   vcf_folder = vcf_folder, num_causal = 1, superpopulation = "EUR", 
                   file_sep=" ", force_new_subset = force_new_subset,
                   LD_block = T)   #maxPos=27440000,

PTK2B

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /sc/orga/projects/ad-omics/data/AD_GWAS/Kunkle_2019/PTK2B_EUR_Kunkle_2019_subset.txt …

Subset file looks good! :) + Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf

—— Importing VCF as bed file… ——

++++++++++ Reading in BIM file… ++++++++++

++++++++++ Calculating LD ++++++++++

++++++++++ LD matrix calculated in 1.13 seconds. ++++++++++

Returning lead SNP’s block…

++++++++++ Calculating LD blocks… ++++++++++

Number of SNPs in LD block = 52 + Fine mapping with SusieR… [1] “objective:-67.2834173039148” [1] “objective:-64.5473940434646” [1] “objective:-64.3422618611751” [1] “objective:-64.3269573087374” [1] “objective:-64.3261843743215” [1] “objective:-64.3261453315648”

Credible Set:

****** 4 SNPs included in Credible Set ******

PTK2B fine-mapped in 29.78 seconds

eQTL

  • AFR = AFA = African
  • AMR = Ad Mixed American
  • EAS = East Asian
  • EUR = CAU = European
  • SAS = South Asian
  • HIS = Hispanic

MESA - AFA

dataset_name <- "MESA_AFA" 

allResults[[dataset_name]] <- finemap_geneList( geneList=c("LRRK2"),  superpopulation = "CAU",
                                                top_SNPs = "auto", query_by = "gene",
                   fullSS_path = Directory_info(dataset_name, "fullSumStats"),
                   chrom_col = "chr", position_col = "pos_snps", snp_col="snps",
                   pval_col="pvalue", effect_col="beta", gene_col="gene_name", 
                   stderr_col = "calculate",  tstat_col = "statistic",
                   vcf_folder = vcf_folder, num_causal = 1, 
                   force_new_subset = force_new_subset,
                   LD_block = T)

LRRK2

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /sc/orga/projects/ad-omics/data/MESA/LRRK2_EUR_MESA_subset.txt …

Subset file looks good! :) + Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/LRRK2_subset.vcf

—— Importing VCF as bed file… ——

++++++++++ Reading in BIM file… ++++++++++

++++++++++ Calculating LD ++++++++++

++++++++++ LD matrix calculated in 1.61 seconds. ++++++++++

Returning lead SNP’s block…

++++++++++ Calculating LD blocks… ++++++++++

Number of SNPs in LD block = 0

  • Fine mapping with SusieR…
## Error in eigen(R, only.values = TRUE): 0 x 0 matrix

MESA - CAU

dataset_name <- "MESA_CAU" 

allResults[[dataset_name]] <- finemap_geneList( geneList=c("LRRK2"),  superpopulation = "CAU",
                                                top_SNPs = "auto", query_by = "gene",
                   fullSS_path = Directory_info(dataset_name, "fullSumStats"),
                   # fullSS_path = "Data/eQTL/MESA/CAU_cis_eqtl_summary_statistics.txt",
                   chrom_col = "chr", position_col = "pos_snps", snp_col="snps",
                   pval_col="pvalue", effect_col="beta", gene_col="gene_name", 
                   stderr_col = "calculate",  tstat_col = "statistic",
                   vcf_folder = vcf_folder, num_causal = 1, 
                   force_new_subset = force_new_subset,
                   LD_block = T)

LRRK2

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /sc/orga/projects/ad-omics/data/MESA/LRRK2_EUR_MESA_subset.txt …

Subset file looks good! :) + Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/LRRK2_subset.vcf

—— Importing VCF as bed file… ——

++++++++++ Reading in BIM file… ++++++++++

++++++++++ Calculating LD ++++++++++

++++++++++ LD matrix calculated in 1.61 seconds. ++++++++++

Returning lead SNP’s block…

++++++++++ Calculating LD blocks… ++++++++++

Number of SNPs in LD block = 0

  • Fine mapping with SusieR…
## Error in eigen(R, only.values = TRUE): 0 x 0 matrix

MESA - HIS

Used the Ad Mixed American (AMR) reference panel given the absence of a Hispanic reference panel for 1KG_Phase1.

dataset_name <- "MESA_HIS" 

allResults[[dataset_name]] <- finemap_geneList( geneList=c("LRRK2"),  superpopulation = "CAU",
                                                top_SNPs = "auto", query_by = "gene",
                   fullSS_path = Directory_info(dataset_name, "fullSumStats"),
                   chrom_col = "chr", position_col = "pos_snps", snp_col="snps",
                   pval_col="pvalue", effect_col="beta", gene_col="gene_name", 
                   stderr_col = "calculate",  tstat_col = "statistic",
                   vcf_folder = vcf_folder, num_causal = 1, 
                   force_new_subset = force_new_subset,
                   LD_block = T)

LRRK2

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /sc/orga/projects/ad-omics/data/MESA/LRRK2_EUR_MESA_subset.txt …

Subset file looks good! :) + Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/LRRK2_subset.vcf

—— Importing VCF as bed file… ——

++++++++++ Reading in BIM file… ++++++++++

++++++++++ Calculating LD ++++++++++

++++++++++ LD matrix calculated in 1.71 seconds. ++++++++++

Returning lead SNP’s block…

++++++++++ Calculating LD blocks… ++++++++++

Number of SNPs in LD block = 0

  • Fine mapping with SusieR…
## Error in eigen(R, only.values = TRUE): 0 x 0 matrix

Fairfax (2014)

Use the Nalls et al. (2019) LRRK2 locus to query for SNPs in the eQTL data.

# +++++++++++++++ CD14 Stimulation +++++++++++++++ #
dataset_name <- "Fairfax_2014_CD14"
allResults[[dataset_name]] <- finemap_geneList( geneList=c("LRRK2"),  superpopulation = "CAU",
                                                top_SNPs = Nalls_top_SNPs, query_by = "coordinates_merged",
                   fullSS_path = Directory_info(dataset_name, "fullSumStats"),
                   # fullSS_path=  "Data/eQTL/Fairfax/cis.eqtls.fairfax.all.chr.CD14.47231.414.b.qced.f.txt",
                   chrom_col = "SNP", position_col = "SNP", snp_col="SNP",
                   pval_col="p-value", effect_col="beta", gene_col="gene", 
                   stderr_col = "calculate",  tstat_col = "t-stat",
                   vcf_folder = vcf_folder, num_causal = 1, 
                   force_new_subset = force_new_subset,
                   LD_block = T)

LRRK2

  • Extracting SNPs flanking lead SNP… Extracting relevant variants from fullSS…

++ Query Method: ‘coordinates_merged’ —Min snp position: 40114434 — —Max snp position: 41114434 —

cat /hpc/users/rajt01/ad-omics/data/fairfax/sumstats/cis.eqtls.fairfax.all.chr.CD14.47231.414.b.qced.f.txt | tr -s ‘:’ ‘’ | awk -F ‘’ ‘NR==1 {print “CHR POS” $2" " $3" " $4" " $5" " $6 } NR>1 {if($1 == 12 && ($2 >=40114434&& $2 <=41114434)) {print $0}}’ | tr -s ‘:’ ‘’ > /hpc/users/rajt01/ad-omics/data/fairfax/sumstats/LRRK2_EUR_sumstats_subset.txt

Preprocessing SNP subset…

## Warning in data.table::fread(subset_path, sep = file_sep, nrows = 2):
## Detected 1 column names but the data has 2 columns (i.e. invalid file).
## Added 1 extra default column name for the first column which is guessed to
## be row names or an index. Use setnames() afterwards if this guess is not
## correct, or fix the file write command that created the file to create a
## valid file.

Calculating Standard Error…

## Error in `[.data.table`(x, r, vars, with = FALSE): column(s) not found: beta
# +++++++++++++++ IFN Stimulation +++++++++++++++ #
dataset_name <- "Fairfax_2014_IFN"
allResults[[dataset_name]] <- finemap_geneList( geneList=c("LRRK2"),  superpopulation = "CAU",
                                                top_SNPs = Nalls_top_SNPs, query_by = "coordinates_merged",
                   fullSS_path = Directory_info(dataset_name, "fullSumStats"),
                   chrom_col = "SNP", position_col = "SNP", snp_col="SNP",
                   pval_col="p-value", effect_col="beta", gene_col="gene", 
                   stderr_col = "calculate",  tstat_col = "t-stat",
                   vcf_folder = vcf_folder, num_causal = 1, 
                   force_new_subset = force_new_subset,
                   LD_block = T)

LRRK2

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /hpc/users/rajt01/ad-omics/data/fairfax/sumstats/LRRK2_EUR_sumstats_subset.txt …
## Warning in data.table::fread(file_path, nrows = 2, sep = file_sep):
## Detected 1 column names but the data has 2 columns (i.e. invalid file).
## Added 1 extra default column name for the first column which is guessed to
## be row names or an index. Use setnames() afterwards if this guess is not
## correct, or fix the file write command that created the file to create a
## valid file.

Subset file looks good! :) + Creating LD matrix…

## Warning in min(flankingSNPs$POS): no non-missing arguments to min;
## returning Inf
## Warning in max(flankingSNPs$POS): no non-missing arguments to max;
## returning -Inf

LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/LRRK2_subset.vcf

—— Importing VCF as bed file… ——

## Error in eval(e, x, parent.frame()): object 'leadSNP' not found
# +++++++++++++++ LPS Stimulation (after 2 hours) +++++++++++++++ #
dataset_name <- "Fairfax_2014_LPS2"
allResults[[dataset_name]] <- finemap_geneList( geneList=c("LRRK2"),  superpopulation = "CAU",
                                                top_SNPs = Nalls_top_SNPs, query_by = "coordinates_merged",
                   fullSS_path = Directory_info(dataset_name, "fullSumStats"),
                   chrom_col = "SNP", position_col = "SNP", snp_col="SNP",
                   pval_col="p-value", effect_col="beta", gene_col="gene", 
                   stderr_col = "calculate",  tstat_col = "t-stat",
                   vcf_folder = vcf_folder, num_causal = 1, 
                   force_new_subset = force_new_subset,
                   LD_block = T)

LRRK2

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /hpc/users/rajt01/ad-omics/data/fairfax/sumstats/LRRK2_EUR_sumstats_subset.txt …
## Warning in data.table::fread(file_path, nrows = 2, sep = file_sep):
## Detected 1 column names but the data has 2 columns (i.e. invalid file).
## Added 1 extra default column name for the first column which is guessed to
## be row names or an index. Use setnames() afterwards if this guess is not
## correct, or fix the file write command that created the file to create a
## valid file.

Subset file looks good! :) + Creating LD matrix…

## Warning in min(flankingSNPs$POS): no non-missing arguments to min;
## returning Inf

## Warning in min(flankingSNPs$POS): no non-missing arguments to max;
## returning -Inf

LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/LRRK2_subset.vcf

—— Importing VCF as bed file… ——

## Error in eval(e, x, parent.frame()): object 'leadSNP' not found
# +++++++++++++++ LPS Stimulation (after 24 hours) +++++++++++++++ #
dataset_name <- "Fairfax_2014_LPS24"
allResults[[dataset_name]] <- finemap_geneList( geneList=c("LRRK2"),  superpopulation = "CAU",
                                                top_SNPs = Nalls_top_SNPs, query_by = "coordinates_merged",
                   fullSS_path = Directory_info(dataset_name, "fullSumStats"),
                   chrom_col = "SNP", position_col = "SNP", snp_col="SNP",
                   pval_col="p-value", effect_col="beta", gene_col="gene", 
                   stderr_col = "calculate",  tstat_col = "t-stat",
                   vcf_folder = vcf_folder, num_causal = 1, 
                   force_new_subset = force_new_subset,
                   LD_block = T)

LRRK2

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /hpc/users/rajt01/ad-omics/data/fairfax/sumstats/LRRK2_EUR_sumstats_subset.txt …
## Warning in data.table::fread(file_path, nrows = 2, sep = file_sep):
## Detected 1 column names but the data has 2 columns (i.e. invalid file).
## Added 1 extra default column name for the first column which is guessed to
## be row names or an index. Use setnames() afterwards if this guess is not
## correct, or fix the file write command that created the file to create a
## valid file.

Subset file looks good! :) + Creating LD matrix…

## Warning in min(flankingSNPs$POS): no non-missing arguments to min;
## returning Inf

## Warning in min(flankingSNPs$POS): no non-missing arguments to max;
## returning -Inf

LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/LRRK2_subset.vcf

—— Importing VCF as bed file… ——

## Error in eval(e, x, parent.frame()): object 'leadSNP' not found

Merge Results

merged_results <- merge_finemapping_results(allResults, csv_path ="Results/merged_finemapping_results.csv")
createDT(merged_results, caption = "Finemapped Credible Sets for Multiple Datasets")
# table(as.character(merged_results$SNP))